import requests as r
import re
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import seaborn as sns
import time
import matplotlib.pyplot as plt
%matplotlib inline
sns.set()
import warnings
warnings.simplefilter('ignore') #ignore the warnings, not the errors
# happy to say everything will work right now, not like the last time

everyone is the same - no need of machine learning to predict
Types of data that can influence my mood:
Sharing personal info:

|
|
|
mood=pd.read_csv('python_data/Daylio_01.2019_07.2020_date.mood.mood_text.activities.csv')
#separate activities by dummies
mood=pd.concat([mood[['full_date','mood_num', 'mood']],mood.activities.str.replace(' ','').str.get_dummies(sep='|')], axis=1)
#leave only the columns I feel comfortable sharing
mood=mood[['full_date', 'mood_num', 'mood','4pda/search', 'biking(20m+)', 'cleaning',
'cook(nomicrow)', 'daysleep', 'drums(GHincluded)',
'eatingout', 'films', 'friends', 'gaming', 'italiano', 'laundry',
'maladie', 'party/bar', 'people(1h+)', 'personalwork',
'reading', 'shopping(big)', 'shopping(courses)', 'sport',
'stressventre', 'sun(onskin)', 'television', 'travel',
'volley', 'walking(30m+)', 'workplacework']]
#rename somecolumns for better readability
mood.rename(columns={'full_date': "date",
"mood_num": "mood_float",
'sun(onskin)':'sunnyday',
'cook(nomicrow)':'cook',
'biking(20m+)':'biking',
'drums(GHincluded)':'drums',
'walking(30m+)':'walking',
'4pda/search': 'repairing',
'volley': "volley_ball",
'shopping(courses)': "supermarket",
'people(1h+)': "people",
'maladie': "illness",
'shopping(big)':'shopping',
'stressventre':'stress'
}, inplace=True)
#date to datetime
mood.date=pd.to_datetime(mood.date)
#sort values and reset index
mood.sort_values(by='date',ascending=True,inplace=True)
mood=mood.reset_index(drop=True)
mood.head()

|
|
|
money=pd.read_csv('python_data/Monefy_01.2019_07.2020_Bills_CarteResto_Voyage.csv')
money_sp=pd.read_csv('python_data/Monefy_01.2019_07.2020_NO_Bills_NO_Voyage_With_CarteResto.csv')
money_no_bills=pd.read_csv('python_data/Monefy_01.2019_07.2020_NO_Bills_With_Voyage_With_CarteResto.csv')
#date to datetime
money.date=pd.to_datetime(money.date)
money_sp.date=pd.to_datetime(money_sp.date)
money_no_bills.date=pd.to_datetime(money_no_bills.date)
money.head()

|
|
|
steps_lol=pd.read_csv('miband_data/ACTIVITY.csv')
steps_lol.date=pd.to_datetime(steps_lol.date)
steps_lol['year']=steps_lol.date.dt.year
steps_lol.year.value_counts()
steps=pd.read_csv('miband_data/ACTIVITY_MINUTE.csv')
steps.date=pd.to_datetime(steps.date)
steps.head()

import plotly.express as px
fig = px.pie(mood, values='mood_float', names='mood', title='Pie chart for Eldiias',height=400, hole=.3)
fig.update_traces(textposition='inside', textinfo='percent+label')
#, hole=.3
fig.show()
|
|
|
|
money.head()
#separate to bills and everything else
money['Bills'] = np.where(money['category']=='Bills', 'Bills', 'Not_Bills')
import plotly.express as px
fig = px.pie(money, values='amount', names='Bills', title='My spendings',height=400)
fig.update_traces(textposition='inside', textinfo='percent+label')
#, hole=.3
fig.show()

#pivot table to sum all the buyings per day
money.pivot_table(index='date', values='amount', aggfunc='sum').head()
# add that pivot table to mood dataset
mm=mood.join(money.pivot_table(index='date', values='amount', aggfunc='sum'), on='date')
# preparing table to analysis
mm['amount_spent']=mm.amount.fillna(0)
mm.drop(index=1, inplace=True) #drop a duplicate row
mm.drop('amount', axis=1,inplace=True)
mm.drop('mood', axis=1,inplace=True)
mm.reset_index(inplace=True, drop=True)
#pivot table to sum all the steps per day
steps.pivot_table(index='date', values='steps', aggfunc='sum')
# select only needed dates
steps_pivot=steps.pivot_table(index='date', values='steps', aggfunc='sum')
steps_pivot=steps_pivot.query('date>="2019-01-01" & date<="2020-07-31"')
# add that pivot table to mood dataset
mm_st=mm.join(steps_pivot, on='date')
#fillnawith average
mm_st['steps'].fillna((mm_st['steps'].mean()), inplace=True)
mm_st.head()

mm
# recognize if an activity adds something to my mood
mm.pivot_table(index='sport', values='mood_float', aggfunc='mean')
#show number of rows
#let's get a table of that
#specify columns
cols=['repairing', 'biking', 'cleaning', 'cook',
'daysleep', 'drums', 'eatingout', 'films', 'friends', 'gaming',
'italiano', 'laundry', 'illness', 'party/bar', 'people', 'personalwork',
'reading', 'shopping', 'supermarket', 'sport', 'stress',
'sunnyday', 'television', 'travel', 'volley_ball', 'walking',
'workplacework']
#create a table of activity impact on my mood
mood_inc=pd.DataFrame()
for col in cols:
mood_inc=mood_inc.append({'Activity':col,'Mood_bonus':(mm.pivot_table(index=col, values='mood_float', aggfunc='mean').mood_float[1]-mm.pivot_table(index=col, values='mood_float', aggfunc='mean').mood_float[0])*100},ignore_index=True)
# Merge mood with the number of its occurencies
# that's actually very ugly but I was very down doing that so I leave it like that
mood_occ=mm[cols].mean()*100
mood_df=pd.DataFrame(mood_occ)
mood_df=mood_df.reset_index()
mood_df.rename(columns={'0':'Mood_occ'},inplace=True)
mood_df['Mood_occ']=mood_df[0]
mood_inc_occ=pd.concat([mood_inc,mood_df], axis=1)
mood_inc_occ.drop('index', axis=1,inplace=True)
mood_inc_occ=mood_inc_occ[['Activity','Mood_bonus','Mood_occ']]
mood_inc_occ.sort_values(by='Mood_bonus',ascending=False,inplace=True)
mood_inc_occ=mood_inc_occ.round(1)
#mood_inc
#mood_occ
mood_inc_occ.head(6)

|
|
# What amount of steps in a day makes me happy?
mm_st.pivot_table(index='mood_float', values='steps', aggfunc='mean')
mm_st.pivot_table(index='mood_float', values='steps', aggfunc='mean').plot(kind='bar',xlabel='Mood', ylabel='Steps',figsize=(12,8)).legend(['I like to walk']);

# What amount of money spent in a day makes me happy?
mm.pivot_table(index='mood_float', values='amount_spent', aggfunc='mean')
mm.pivot_table(index='mood_float', values='amount_spent', aggfunc='mean').plot(kind='bar',xlabel='Mood', ylabel='Money',figsize=(12,8)).legend(['Does anyone need a trendline here?']);

mm['weekday']=mm.date.dt.weekday
mm['month']=mm.date.dt.month
mm['day_of_month']=mm.date.dt.day
mm['weekend'] = np.where(mm['weekday']<5, 0, 1) #weekend or not
#create normalized dataframe
mmn=mm.copy()
#normalize 2 columns - float and amount
mmn['mood_float']=(mm.mood_float-mm.mood_float.mean())/mm.mood_float.std()
mmn['amount_spent']=(mm.amount_spent-mm.amount_spent.mean())/mm.amount_spent.std()
mmn.drop('date', axis=1, inplace=True)
y=mmn.mood_float
X=mmn.drop('mood_float',axis=1)
from sklearn.decomposition import PCA
pca=PCA(7)
pca.fit(X)
x_new=pca.transform(X)
pca.explained_variance_ratio_.sum()
from statsmodels.api import OLS
from statsmodels.api import add_constant
from sklearn.tree import DecisionTreeClassifier
y
kkk=DecisionTreeClassifier().fit(x_new,(mm.mood_float*2).astype(int))
plt.hist(kkk.predict(x_new))
plt.show()
(mm.mood_float*2).astype(int).hist()
mm.mood_float.unique()
mm.mood_float/5
from sklearn.metrics import r2_score
r2_score((mm.mood_float*2).astype(int), kkk.predict(x_new))
from sklearn.metrics import confusion_matrix
#that thing says that I have 100% SUPER PREDICTION
confusion_matrix((mm.mood_float*2).astype(int), kkk.predict(x_new))
pca.components_
Confusion Matrix
DecisionTreeClassifier based on data without transformations
# apparently, I can get a decision tree
# function to predict my mood based on parameters - must work with a group of parameters
# I can run Monte Carlo test on 1 million lines to get the activities

#recap
sleep=pd.read_csv('miband_data/SLEEP.csv')
sleep.date=pd.to_datetime(sleep.date)
sleep.drop_duplicates(subset=None, keep='first', inplace=True)
sleep['sleep_time'] = (sleep['deepSleepTime'] + sleep['shallowSleepTime'])/60
#data is actually limited by miband to 2019 and 2020, here I will limit it to the same limit as before
sleep=sleep.query('date<="2020-07-31"')
# no need for pivot table here but I copy the code from previous ones so it's easier
sleep_pivot=sleep.pivot_table(index='date', values='sleep_time', aggfunc='mean')
#join
mm_sl=mm.join(sleep_pivot, on='date')
#fillna with average - fillna doesn't work
mm_sl['sleep_time_right'] = np.where(mm_sl['sleep_time']==0, mm_sl['sleep_time'].median(),mm_sl['sleep_time'])
mm_sl.pivot_table(index='mood_float', values='sleep_time_right', aggfunc='mean').plot(kind='bar',xlabel='Mood', ylabel='Sleep',figsize=(12,8)).legend(['I do not care about sleeping']);
sleep.sleep_time.plot(kind='box')
pulse=pd.read_csv('miband_data/HEARTRATE_AUTO.csv')
#pulse.drop_duplicates(subset=None, keep='first', inplace=True) - no duplicates
pulse.date=pd.to_datetime(pulse.date)
pulse.time=pd.to_datetime(pulse.time)
pulse.sort_values(by='date',inplace=True)
pulse.pivot_table(index='date', values='heartRate', aggfunc='mean').plot(kind='box')
# no need for pivot table here but I copy the code from previous ones so it's easier
pulse_pivot=pulse.pivot_table(index='date', values='heartRate', aggfunc='mean')
#join
mm_pu=mm.join(pulse_pivot, on='date')
# NO fillna with average - a lot of data is missing - DROPNA
mm_pu.dropna(inplace=True)
mm_pu.pivot_table(index='mood_float', values='heartRate', aggfunc='mean').plot(kind='bar',xlabel='Mood', ylabel='Pulse',figsize=(12,8)).set_ylim(71,81);
sns.lineplot(data=pulse, x="time", y="heartRate")
import plotly
plotly.offline.init_notebook_mode()